from urllib import request
from io import BytesIO
import gzip
from lxml import etree
from bs4 import BeautifulSoup


url = 'https://gaokao.chsi.com.cn/sch/search--ss-on,option-qg,searchType-1,start-0.dhtml'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': "zh-CN,zh;q=0.9",
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Host': 'gaokao.chsi.com.cn',
}
Request = request.Request(url, headers=headers)
response = request.urlopen(Request)
html = response.read()
buff = BytesIO(html)
f = gzip.GzipFile(fileobj=buff)
html = f.read().decode('utf-8')

data = BeautifulSoup(html, "lxml")
nodes = data.find_all("tr")
List = []
for node in nodes[1:]:
    Map = {}
    s = []
    for n in node.find_all("td"):
        s.append(n.get_text().strip())
    Map['院校名称'] = s[0]
    Map['院校所在地'] = s[1]
    Map['教育行政主管部门'] = s[2]
    Map['院校类型'] = s[3]
    Map['学历层次'] = s[4]
    Map['一流大学'] = s[5] == '\ue664'
    Map['一流学科'] = s[6] == '\ue664'
    Map['研究生院'] = s[7] == '\ue664'
    Map['满意度'] = s[8]
    List.append(Map)




